/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    AprioriItemSet.java
 *    Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.associations;

import weka.core.ContingencyTables;
import weka.core.FastVector;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Hashtable;

/**
 * Class for storing a set of items. Item sets are stored in a lexicographic
 * order, which is determined by the header information of the set of instances
 * used for generating the set of items. All methods in this class assume that
 * item sets are stored in lexicographic order. The class provides methods that
 * are used in the Apriori algorithm to construct association rules.
 * 
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
 * @version $Revision: 1.6 $
 */
public class AprioriItemSet extends ItemSet implements Serializable,
		RevisionHandler {

	/** for serialization */
	static final long serialVersionUID = 7684467755712672058L;

	/**
	 * Constructor
	 * 
	 * @param totalTrans
	 *            the total number of transactions in the data
	 */
	public AprioriItemSet(int totalTrans) {
		super(totalTrans);
	}

	/**
	 * Outputs the confidence for a rule.
	 * 
	 * @param premise
	 *            the premise of the rule
	 * @param consequence
	 *            the consequence of the rule
	 * @return the confidence on the training data
	 */
	public static double confidenceForRule(AprioriItemSet premise,
			AprioriItemSet consequence) {

		return (double) consequence.m_counter / (double) premise.m_counter;
	}

	/**
	 * Outputs the lift for a rule. Lift is defined as:<br>
	 * confidence / prob(consequence)
	 * 
	 * @param premise
	 *            the premise of the rule
	 * @param consequence
	 *            the consequence of the rule
	 * @param consequenceCount
	 *            how many times the consequence occurs independent of the
	 *            premise
	 * @return the lift on the training data
	 */
	public double liftForRule(AprioriItemSet premise,
			AprioriItemSet consequence, int consequenceCount) {
		double confidence = confidenceForRule(premise, consequence);

		return confidence
				/ ((double) consequenceCount / (double) m_totalTransactions);
	}

	/**
	 * Outputs the leverage for a rule. Leverage is defined as: <br>
	 * prob(premise & consequence) - (prob(premise) * prob(consequence))
	 * 
	 * @param premise
	 *            the premise of the rule
	 * @param consequence
	 *            the consequence of the rule
	 * @param premiseCount
	 *            how many times the premise occurs independent of the
	 *            consequent
	 * @param consequenceCount
	 *            how many times the consequence occurs independent of the
	 *            premise
	 * @return the leverage on the training data
	 */
	public double leverageForRule(AprioriItemSet premise,
			AprioriItemSet consequence, int premiseCount, int consequenceCount) {
		double coverageForItemSet = (double) consequence.m_counter
				/ (double) m_totalTransactions;
		double expectedCoverageIfIndependent = ((double) premiseCount / (double) m_totalTransactions)
				* ((double) consequenceCount / (double) m_totalTransactions);
		double lev = coverageForItemSet - expectedCoverageIfIndependent;
		return lev;
	}

	/**
	 * Outputs the conviction for a rule. Conviction is defined as: <br>
	 * prob(premise) * prob(!consequence) / prob(premise & !consequence)
	 * 
	 * @param premise
	 *            the premise of the rule
	 * @param consequence
	 *            the consequence of the rule
	 * @param premiseCount
	 *            how many times the premise occurs independent of the
	 *            consequent
	 * @param consequenceCount
	 *            how many times the consequence occurs independent of the
	 *            premise
	 * @return the conviction on the training data
	 */
	public double convictionForRule(AprioriItemSet premise,
			AprioriItemSet consequence, int premiseCount, int consequenceCount) {
		double num = (double) premiseCount
				* (double) (m_totalTransactions - consequenceCount)
				/ (double) m_totalTransactions;
		double denom = ((premiseCount - consequence.m_counter) + 1);

		if (num < 0 || denom < 0) {
			System.err.println("*** " + num + " " + denom);
			System.err.println("premis count: " + premiseCount
					+ " consequence count " + consequenceCount
					+ " total trans " + m_totalTransactions);
		}
		return num / denom;
	}

	/**
	 * Generates all rules for an item set.
	 * 
	 * @param minConfidence
	 *            the minimum confidence the rules have to have
	 * @param hashtables
	 *            containing all(!) previously generated item sets
	 * @param numItemsInSet
	 *            the size of the item set for which the rules are to be
	 *            generated
	 * @return all the rules with minimum confidence for the given item set
	 */
	public FastVector[] generateRules(double minConfidence,
			FastVector hashtables, int numItemsInSet) {

		FastVector premises = new FastVector(), consequences = new FastVector(), conf = new FastVector();
		FastVector[] rules = new FastVector[3], moreResults;
		AprioriItemSet premise, consequence;
		Hashtable hashtable = (Hashtable) hashtables
				.elementAt(numItemsInSet - 2);

		// Generate all rules with one item in the consequence.
		for (int i = 0; i < m_items.length; i++)
			if (m_items[i] != -1) {
				premise = new AprioriItemSet(m_totalTransactions);
				consequence = new AprioriItemSet(m_totalTransactions);
				premise.m_items = new int[m_items.length];
				consequence.m_items = new int[m_items.length];
				consequence.m_counter = m_counter;

				for (int j = 0; j < m_items.length; j++)
					consequence.m_items[j] = -1;
				System.arraycopy(m_items, 0, premise.m_items, 0, m_items.length);
				premise.m_items[i] = -1;

				consequence.m_items[i] = m_items[i];
				premise.m_counter = ((Integer) hashtable.get(premise))
						.intValue();
				premises.addElement(premise);
				consequences.addElement(consequence);
				conf.addElement(new Double(confidenceForRule(premise,
						consequence)));
			}
		rules[0] = premises;
		rules[1] = consequences;
		rules[2] = conf;
		pruneRules(rules, minConfidence);

		// Generate all the other rules
		moreResults = moreComplexRules(rules, numItemsInSet, 1, minConfidence,
				hashtables);
		if (moreResults != null)
			for (int i = 0; i < moreResults[0].size(); i++) {
				rules[0].addElement(moreResults[0].elementAt(i));
				rules[1].addElement(moreResults[1].elementAt(i));
				rules[2].addElement(moreResults[2].elementAt(i));
			}
		return rules;
	}

	/**
	 * Generates all significant rules for an item set.
	 * 
	 * @param minMetric
	 *            the minimum metric (confidence, lift, leverage, improvement)
	 *            the rules have to have
	 * @param metricType
	 *            (confidence=0, lift, leverage, improvement)
	 * @param hashtables
	 *            containing all(!) previously generated item sets
	 * @param numItemsInSet
	 *            the size of the item set for which the rules are to be
	 *            generated
	 * @param numTransactions
	 * @param significanceLevel
	 *            the significance level for testing the rules
	 * @return all the rules with minimum metric for the given item set
	 * @exception Exception
	 *                if something goes wrong
	 */
	public final FastVector[] generateRulesBruteForce(double minMetric,
			int metricType, FastVector hashtables, int numItemsInSet,
			int numTransactions, double significanceLevel) throws Exception {

		FastVector premises = new FastVector(), consequences = new FastVector(), conf = new FastVector(), lift = new FastVector(), lev = new FastVector(), conv = new FastVector();
		FastVector[] rules = new FastVector[6];
		AprioriItemSet premise, consequence;
		Hashtable hashtableForPremise, hashtableForConsequence;
		int numItemsInPremise, help, max, consequenceUnconditionedCounter;
		double[][] contingencyTable = new double[2][2];
		double metric, chiSquared;

		// Generate all possible rules for this item set and test their
		// significance.
		max = (int) Math.pow(2, numItemsInSet);
		for (int j = 1; j < max; j++) {
			numItemsInPremise = 0;
			help = j;
			while (help > 0) {
				if (help % 2 == 1)
					numItemsInPremise++;
				help /= 2;
			}
			if (numItemsInPremise < numItemsInSet) {
				hashtableForPremise = (Hashtable) hashtables
						.elementAt(numItemsInPremise - 1);
				hashtableForConsequence = (Hashtable) hashtables
						.elementAt(numItemsInSet - numItemsInPremise - 1);
				premise = new AprioriItemSet(m_totalTransactions);
				consequence = new AprioriItemSet(m_totalTransactions);
				premise.m_items = new int[m_items.length];

				consequence.m_items = new int[m_items.length];
				consequence.m_counter = m_counter;
				help = j;
				for (int i = 0; i < m_items.length; i++)
					if (m_items[i] != -1) {
						if (help % 2 == 1) {
							premise.m_items[i] = m_items[i];
							consequence.m_items[i] = -1;
						} else {
							premise.m_items[i] = -1;
							consequence.m_items[i] = m_items[i];
						}
						help /= 2;
					} else {
						premise.m_items[i] = -1;
						consequence.m_items[i] = -1;
					}
				premise.m_counter = ((Integer) hashtableForPremise.get(premise))
						.intValue();
				consequenceUnconditionedCounter = ((Integer) hashtableForConsequence
						.get(consequence)).intValue();

				if (metricType == 0) {
					contingencyTable[0][0] = (double) (consequence.m_counter);
					contingencyTable[0][1] = (double) (premise.m_counter - consequence.m_counter);
					contingencyTable[1][0] = (double) (consequenceUnconditionedCounter - consequence.m_counter);
					contingencyTable[1][1] = (double) (numTransactions
							- premise.m_counter
							- consequenceUnconditionedCounter + consequence.m_counter);
					chiSquared = ContingencyTables.chiSquared(contingencyTable,
							false);

					metric = confidenceForRule(premise, consequence);

					if ((!(metric < minMetric))
							&& (!(chiSquared > significanceLevel))) {
						premises.addElement(premise);
						consequences.addElement(consequence);
						conf.addElement(new Double(metric));
						lift.addElement(new Double(liftForRule(premise,
								consequence, consequenceUnconditionedCounter)));
						lev.addElement(new Double(leverageForRule(premise,
								consequence, premise.m_counter,
								consequenceUnconditionedCounter)));
						conv.addElement(new Double(convictionForRule(premise,
								consequence, premise.m_counter,
								consequenceUnconditionedCounter)));
					}
				} else {
					double tempConf = confidenceForRule(premise, consequence);
					double tempLift = liftForRule(premise, consequence,
							consequenceUnconditionedCounter);
					double tempLev = leverageForRule(premise, consequence,
							premise.m_counter, consequenceUnconditionedCounter);
					double tempConv = convictionForRule(premise, consequence,
							premise.m_counter, consequenceUnconditionedCounter);
					switch (metricType) {
					case 1:
						metric = tempLift;
						break;
					case 2:
						metric = tempLev;
						break;
					case 3:
						metric = tempConv;
						break;
					default:
						throw new Exception("ItemSet: Unknown metric type!");
					}
					if (!(metric < minMetric)) {
						premises.addElement(premise);
						consequences.addElement(consequence);
						conf.addElement(new Double(tempConf));
						lift.addElement(new Double(tempLift));
						lev.addElement(new Double(tempLev));
						conv.addElement(new Double(tempConv));
					}
				}
			}
		}
		rules[0] = premises;
		rules[1] = consequences;
		rules[2] = conf;
		rules[3] = lift;
		rules[4] = lev;
		rules[5] = conv;
		return rules;
	}

	/**
	 * Subtracts an item set from another one.
	 * 
	 * @param toSubtract
	 *            the item set to be subtracted from this one.
	 * @return an item set that only contains items form this item sets that are
	 *         not contained by toSubtract
	 */
	public final AprioriItemSet subtract(AprioriItemSet toSubtract) {

		AprioriItemSet result = new AprioriItemSet(m_totalTransactions);

		result.m_items = new int[m_items.length];

		for (int i = 0; i < m_items.length; i++)
			if (toSubtract.m_items[i] == -1)
				result.m_items[i] = m_items[i];
			else
				result.m_items[i] = -1;
		result.m_counter = 0;
		return result;
	}

	/**
	 * Generates rules with more than one item in the consequence.
	 * 
	 * @param rules
	 *            all the rules having (k-1)-item sets as consequences
	 * @param numItemsInSet
	 *            the size of the item set for which the rules are to be
	 *            generated
	 * @param numItemsInConsequence
	 *            the value of (k-1)
	 * @param minConfidence
	 *            the minimum confidence a rule has to have
	 * @param hashtables
	 *            the hashtables containing all(!) previously generated item
	 *            sets
	 * @return all the rules having (k)-item sets as consequences
	 */
	private final FastVector[] moreComplexRules(FastVector[] rules,
			int numItemsInSet, int numItemsInConsequence, double minConfidence,
			FastVector hashtables) {

		AprioriItemSet newPremise;
		FastVector[] result, moreResults;
		FastVector newConsequences, newPremises = new FastVector(), newConf = new FastVector();
		Hashtable hashtable;

		if (numItemsInSet > numItemsInConsequence + 1) {
			hashtable = (Hashtable) hashtables.elementAt(numItemsInSet
					- numItemsInConsequence - 2);
			newConsequences = mergeAllItemSets(rules[1],
					numItemsInConsequence - 1, m_totalTransactions);
			Enumeration enu = newConsequences.elements();
			while (enu.hasMoreElements()) {
				AprioriItemSet current = (AprioriItemSet) enu.nextElement();
				current.m_counter = m_counter;
				newPremise = subtract(current);
				newPremise.m_counter = ((Integer) hashtable.get(newPremise))
						.intValue();
				newPremises.addElement(newPremise);
				newConf.addElement(new Double(confidenceForRule(newPremise,
						current)));
			}
			result = new FastVector[3];
			result[0] = newPremises;
			result[1] = newConsequences;
			result[2] = newConf;
			pruneRules(result, minConfidence);
			moreResults = moreComplexRules(result, numItemsInSet,
					numItemsInConsequence + 1, minConfidence, hashtables);
			if (moreResults != null)
				for (int i = 0; i < moreResults[0].size(); i++) {
					result[0].addElement(moreResults[0].elementAt(i));
					result[1].addElement(moreResults[1].elementAt(i));
					result[2].addElement(moreResults[2].elementAt(i));
				}
			return result;
		} else
			return null;
	}

	/**
	 * Returns the contents of an item set as a string.
	 * 
	 * @param instances
	 *            contains the relevant header information
	 * @return string describing the item set
	 */
	public final String toString(Instances instances) {

		return super.toString(instances);
	}

	/**
	 * Converts the header info of the given set of instances into a set of item
	 * sets (singletons). The ordering of values in the header file determines
	 * the lexicographic order.
	 * 
	 * @param instances
	 *            the set of instances whose header info is to be used
	 * @return a set of item sets, each containing a single item
	 * @exception Exception
	 *                if singletons can't be generated successfully
	 */
	public static FastVector singletons(Instances instances) throws Exception {

		FastVector setOfItemSets = new FastVector();
		ItemSet current;

		for (int i = 0; i < instances.numAttributes(); i++) {
			if (instances.attribute(i).isNumeric())
				throw new Exception("Can't handle numeric attributes!");
			for (int j = 0; j < instances.attribute(i).numValues(); j++) {
				current = new AprioriItemSet(instances.numInstances());
				current.m_items = new int[instances.numAttributes()];
				for (int k = 0; k < instances.numAttributes(); k++)
					current.m_items[k] = -1;
				current.m_items[i] = j;
				setOfItemSets.addElement(current);
			}
		}
		return setOfItemSets;
	}

	/**
	 * Merges all item sets in the set of (k-1)-item sets to create the (k)-item
	 * sets and updates the counters.
	 * 
	 * @param itemSets
	 *            the set of (k-1)-item sets
	 * @param size
	 *            the value of (k-1)
	 * @param totalTrans
	 *            the total number of transactions in the data
	 * @return the generated (k)-item sets
	 */
	public static FastVector mergeAllItemSets(FastVector itemSets, int size,
			int totalTrans) {

		FastVector newVector = new FastVector();
		ItemSet result;
		int numFound, k;

		for (int i = 0; i < itemSets.size(); i++) {
			ItemSet first = (ItemSet) itemSets.elementAt(i);
			out: for (int j = i + 1; j < itemSets.size(); j++) {
				ItemSet second = (ItemSet) itemSets.elementAt(j);
				result = new AprioriItemSet(totalTrans);
				result.m_items = new int[first.m_items.length];

				// Find and copy common prefix of size 'size'
				numFound = 0;
				k = 0;
				while (numFound < size) {
					if (first.m_items[k] == second.m_items[k]) {
						if (first.m_items[k] != -1)
							numFound++;
						result.m_items[k] = first.m_items[k];
					} else
						break out;
					k++;
				}

				// Check difference
				while (k < first.m_items.length) {
					if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
						break;
					else {
						if (first.m_items[k] != -1)
							result.m_items[k] = first.m_items[k];
						else
							result.m_items[k] = second.m_items[k];
					}
					k++;
				}
				if (k == first.m_items.length) {
					result.m_counter = 0;
					newVector.addElement(result);
				}
			}
		}
		return newVector;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 1.6 $");
	}
}
